In [12]:

    
import pandas as pd
import numpy as np



In [13]:

    
timing_file = '../data/all_results_janus_single_node_1-14-17.csv'
properties_file = '../data/uflorida-features.csv'



In [14]:

    
timings = pd.read_csv(timing_file, header=0)
properties = pd.read_csv(properties_file, header=0)

Changing the name of the columns to more simple names for ease-of-use.



In [15]:

    
timings.columns= ['np', 'matrix', 'solver', 'prec', 'status', 'time', 'iters', 'resid']
properties.columns = ['rows', 'cols', 'min_nnz_row', 'row_var', 'col_var', 'diag_var', 'nnz', 'frob_norm', 'symm_frob_norm', 'antisymm_frob_norm', 'one_norm', 'inf_norm', 'symm_inf_norm', 'antisymm_inf_norm', 'max_nnz_row', 'trace', 'abs_trace', 'min_nnz_row', 'avg_nnz_row', 'dummy_rows', 'dummy_rows_kind', 'num_value_symm_1', 'nnz_pattern_symm_1', 'num_value_symm_2', 'nnz_pattern_symm_2', 'row_diag_dom', 'col_diag_dom', 'diag_avg', 'diag_sign', 'diag_nnz', 'lower_bw', 'upper_bw', 'row_log_val_spread', 'col_log_val_spread', 'symm', 'matrix']

Combining the two dataframes into a single dataframe called 'combined.'

Replacing the string data with numerical data.



In [16]:

    
combined = pd.merge(properties, timings)
combined.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 275286 entries, 0 to 275285
Data columns (total 43 columns):
rows                  275286 non-null int64
cols                  275286 non-null int64
min_nnz_row           275286 non-null int64
row_var               275286 non-null float64
col_var               275286 non-null float64
diag_var              275286 non-null float64
nnz                   275286 non-null int64
frob_norm             275286 non-null float64
symm_frob_norm        275286 non-null float64
antisymm_frob_norm    275286 non-null float64
one_norm              275286 non-null float64
inf_norm              275286 non-null float64
symm_inf_norm         275286 non-null float64
antisymm_inf_norm     275286 non-null float64
max_nnz_row           275286 non-null int64
trace                 275286 non-null float64
abs_trace             275286 non-null float64
min_nnz_row           275286 non-null int64
avg_nnz_row           275286 non-null int64
dummy_rows            275286 non-null int64
dummy_rows_kind       275286 non-null int64
num_value_symm_1      275286 non-null int64
nnz_pattern_symm_1    275286 non-null int64
num_value_symm_2      275286 non-null float64
nnz_pattern_symm_2    275286 non-null float64
row_diag_dom          275286 non-null int64
col_diag_dom          275286 non-null int64
diag_avg              275286 non-null float64
diag_sign             275286 non-null int64
diag_nnz              275286 non-null int64
lower_bw              275286 non-null int64
upper_bw              275286 non-null int64
row_log_val_spread    275286 non-null float64
col_log_val_spread    275286 non-null float64
symm                  275286 non-null int64
matrix                275286 non-null object
np                    275286 non-null int64
solver                275286 non-null object
prec                  275286 non-null object
status                275286 non-null object
time                  275286 non-null float64
iters                 156188 non-null float64
resid                 119113 non-null float64
dtypes: float64(20), int64(19), object(4)
memory usage: 92.4+ MB



In [17]:

    
combined = combined.dropna()



In [18]:

    
combined['solver_num'] = combined.solver.map({'FIXED_POINT': 0, 'BICGSTAB': 1, 'MINRES': 2, 'PSEUDOBLOCK_CG': 3, 'PSEUDOBLOCK_STOCHASTIC_CG': 4, 'PSEUDOBLOCK_TFQMR': 5, 'TFQMR': 6, 'LSQR': 7, 'PSEUDOBLOCK_GMRES': 8}).astype(int)
combined['prec_num'] = combined.prec.map({'ILUT': 0, 'RILUK': 1, 'RELAXATION': 2, 'CHEBYSHEV': 3, 'NONE': 4}).astype(int)
combined['status_num'] = combined.status.map({'error': -1, 'unconverged': 0, 'converged': 1}).astype(int)

None of the above should be changed



In [19]:

    
good = combined[combined.status == 'converged']



In [20]:

    
good.groupby('solver').size()









    Out[20]:





solver
BICGSTAB             6935
FIXED_POINT          2147
MINRES               5241
PSEUDOBLOCK_CG       3593
PSEUDOBLOCK_GMRES    6951
PSEUDOBLOCK_TFQMR    5827
TFQMR                5707
dtype: int64

So let's see how big of a difference there is between TFQMR and P_TFQMR



In [76]:

    
values = {"TFQMR", "PSEUDOBLOCK_TFQMR"}
tfqmr = good.loc[good.solver.isin(values)]
tfqmr.solver.unique()
tfqmr = tfqmr.drop(tfqmr.columns[:36], axis=1)
tfqmr = tfqmr.drop(tfqmr.columns[-3:], axis=1)
tfqmr.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 11534 entries, 25 to 275273
Data columns (total 7 columns):
np        11534 non-null int64
solver    11534 non-null object
prec      11534 non-null object
status    11534 non-null object
time      11534 non-null float64
iters     11534 non-null float64
resid     11534 non-null float64
dtypes: float64(3), int64(1), object(3)
memory usage: 720.9+ KB

Shows how much difference there is between the two solver



In [77]:

    
tfqmr = tfqmr.groupby('solver')
tfqmr.describe()









    Out[77]:






  
    
      
      
      iters
      np
      resid
      time
    
    
      solver
      
      
      
      
      
    
  
  
    
      PSEUDOBLOCK_TFQMR
      count
      5827.000000
      5827.000000
      5.827000e+03
      5827.000000
    
    
      mean
      669.441737
      5.921229
      5.432610e-07
      1.237965
    
    
      std
      1574.980811
      3.809055
      3.468492e-07
      7.532847
    
    
      min
      1.000000
      1.000000
      4.560000e-33
      0.000746
    
    
      25%
      13.000000
      2.000000
      2.220000e-07
      0.018921
    
    
      50%
      91.000000
      6.000000
      6.000000e-07
      0.085300
    
    
      75%
      382.000000
      10.000000
      8.735000e-07
      0.426127
    
    
      max
      9959.000000
      12.000000
      1.000000e-06
      271.173000
    
    
      TFQMR
      count
      5707.000000
      5707.000000
      5.707000e+03
      5707.000000
    
    
      mean
      669.346066
      5.915017
      5.414734e-07
      1.134524
    
    
      std
      1583.168929
      3.828605
      3.471924e-07
      7.684904
    
    
      min
      1.000000
      1.000000
      4.560000e-33
      0.000563
    
    
      25%
      13.000000
      2.000000
      2.190000e-07
      0.013752
    
    
      50%
      92.000000
      6.000000
      5.980000e-07
      0.062503
    
    
      75%
      387.000000
      10.000000
      8.720000e-07
      0.306544
    
    
      max
      9959.000000
      12.000000
      1.000000e-06
      288.867000



In [ ]:

		iters	np	resid	time
solver
PSEUDOBLOCK_TFQMR	count	5827.000000	5827.000000	5.827000e+03	5827.000000
	mean	669.441737	5.921229	5.432610e-07	1.237965
	std	1574.980811	3.809055	3.468492e-07	7.532847
	min	1.000000	1.000000	4.560000e-33	0.000746
	25%	13.000000	2.000000	2.220000e-07	0.018921
	50%	91.000000	6.000000	6.000000e-07	0.085300
	75%	382.000000	10.000000	8.735000e-07	0.426127
	max	9959.000000	12.000000	1.000000e-06	271.173000
TFQMR	count	5707.000000	5707.000000	5.707000e+03	5707.000000
	mean	669.346066	5.915017	5.414734e-07	1.134524
	std	1583.168929	3.828605	3.471924e-07	7.684904
	min	1.000000	1.000000	4.560000e-33	0.000563
	25%	13.000000	2.000000	2.190000e-07	0.013752
	50%	92.000000	6.000000	5.980000e-07	0.062503
	75%	387.000000	10.000000	8.720000e-07	0.306544
	max	9959.000000	12.000000	1.000000e-06	288.867000